'''
Created on Mar 5, 2012
@author: oabalbin
'''

import os
import sys
import glob
import subprocess
from collections import defaultdict

JOB_ERROR=1
JOB_SUCCESS=0


def read_files_folder(folderpath,ext):
    ''' '''
    # Read files in folder
    myfiles=defaultdict()
    for infile in glob.glob( os.path.join(folderpath, '*'+ext) ):
        filename=infile.split('/')[-1]
        sp=filename.split('.')[0]
        myfiles[sp]=os.path.join(folderpath,filename)
    return myfiles

if __name__ == '__main__':
    
    ext='snps.vars.raw.2.vcf'
    rootdirsamples='/data/projects/tcgatest/vcf_files'
    rootdir='/data/projects/tcgatest'
    
    samples_dict=read_files_folder(rootdirsamples,ext)
    print samples_dict
    snpfilename='somatic.vars.txt_all'
    masterSNV=open(os.path.join(rootdir,'master_SNV_PRAD.txt'),'w')
    # Any sample to obtain the header
    sp = samples_dict.keys().pop()
    spdir = os.path.join(rootdir,sp)
    snpfile = open(os.path.join(spdir,snpfilename))
    hd=snpfile.next()
    masterSNV.write('#sample'+'\t'+hd)
    snpfile.close()
    for sample in samples_dict:
        spdir = os.path.join(rootdir,sample)
        snpfile = open(os.path.join(spdir,snpfilename))
        hd=snpfile.next()
        for line in snpfile:
            masterSNV.write(sample+'\t'+line)
        snpfile.close()
    
    masterSNV.close()
    